Slip 22


Q.1. Write a python program to implement simple Linear Regression for predicting house 
price. 

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Step 1: Load the dataset
# (You can replace 'house_prices.csv' with your actual dataset file)
df = pd.read_csv("house_prices.csv")

print("✅ Dataset loaded successfully!\n")
print("First 5 rows:\n", df.head(), "\n")

# Step 2: Define features (X) and target (y)
# Assuming dataset has columns like 'Area' (sq ft) and 'Price'
# Modify column names as per your dataset
X = df[['Area']]   # independent variable
y = df['Price']    # dependent variable

# Step 3: Split the dataset into training and testing sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = model.predict(X_test)

# Step 6: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("📊 Model Evaluation:")
print("Mean Squared Error:", round(mse, 2))
print("R² Score:", round(r2, 4))
print("Intercept (b0):", round(model.intercept_, 2))
print("Slope (b1):", round(model.coef_[0], 2))

# Step 7: Visualize the results
plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression Line')
plt.title("Simple Linear Regression - House Price Prediction")
plt.xlabel("Area (sq ft)")
plt.ylabel("Price")
plt.legend()
plt.show()

# Step 8: Example prediction for new input
area_value = [[2500]]  # Example: 2500 sq ft
predicted_price = model.predict(area_value)
print(f"\n🏠 Predicted price for {area_value[0][0]} sq ft = ${predicted_price[0]:.2f}")

Q.2. Use Apriori algorithm on groceries dataset to find which items are brought together. 
Use minimum support =0.25 

import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import warnings

# Ignore runtime warnings from division by zero in association rules
warnings.filterwarnings("ignore", category=RuntimeWarning)

# ===============================
# 1. Dataset
# ===============================
dataset = [
    ['milk', 'bread', 'eggs'],
    ['bread', 'butter'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['bread', 'eggs'],
    ['milk', 'bread', 'butter']
]

# ===============================
# 2. One-hot encoding
# ===============================
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("One-hot encoded dataset:")
print(df)

# ===============================
# 3. Apply Apriori
# ===============================
frequent_itemsets = apriori(df.astype(bool), min_support=0.25, use_colnames=True)

print("\nFrequent Itemsets:")
print(frequent_itemsets)

# ===============================
# 4. Association Rules
# ===============================
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)

# Drop NaN rows if any
rules = rules.dropna()

print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
